unet_cfg = dict(
    type='Text2ImUNet',
    image_size=64,
    base_channels=192,
    in_channels=3,
    resblocks_per_downsample=3,
    attention_res=(32, 16, 8),
    norm_cfg=dict(type='GN32', num_groups=32),
    dropout=0.1,
    num_classes=0,
    use_fp16=False,
    resblock_updown=True,
    attention_cfg=dict(
        type='MultiHeadAttentionBlock',
        num_heads=1,
        num_head_channels=64,
        use_new_attention_order=False,
        encoder_channels=512),
    use_scale_shift_norm=True,
    text_ctx=128,
    xf_width=512,
    xf_layers=16,
    xf_heads=8,
    xf_final_ln=True,
    xf_padding=True,
)
unet_up_cfg = dict(
    type='SuperResText2ImUNet',
    image_size=256,
    base_channels=192,
    in_channels=3,
    output_cfg=dict(var='FIXED'),
    resblocks_per_downsample=2,
    attention_res=(32, 16, 8),
    norm_cfg=dict(type='GN32', num_groups=32),
    dropout=0.1,
    num_classes=0,
    use_fp16=False,
    resblock_updown=True,
    attention_cfg=dict(
        type='MultiHeadAttentionBlock',
        num_heads=1,
        num_head_channels=64,
        use_new_attention_order=False,
        encoder_channels=512),
    use_scale_shift_norm=True,
    text_ctx=128,
    xf_width=512,
    xf_layers=16,
    xf_heads=8,
    xf_final_ln=True,
    xf_padding=True,
)

model = dict(
    type='Glide',
    data_preprocessor=dict(type='DataPreprocessor', mean=[127.5], std=[127.5]),
    unet=unet_cfg,
    diffusion_scheduler=dict(
        type='EditDDIMScheduler',
        variance_type='learned_range',
        beta_schedule='squaredcos_cap_v2'),
    unet_up=unet_up_cfg,
    diffusion_scheduler_up=dict(
        type='EditDDIMScheduler',
        variance_type='learned_range',
        beta_schedule='linear'),
    use_fp16=False)
